# -*- coding: utf-8 -*-
import re
import os
import requests
from bs4 import BeautifulSoup
import json

RAW_URL = [
    "github.global.ssl.fastly.net",
    "assets-cdn.github.com",
    "documentcloud.github.com",
    "gist.github.com",
    "gist.githubusercontent.com",
    "github.githubassets.com",
    "help.github.com",
    "nodeload.github.com",
    "raw.github.com",
    "status.github.com",
    "training.github.com",
    "avatars.githubusercontent.com",
    "avatars0.githubusercontent.com",
    "avatars1.githubusercontent.com",
    "avatars2.githubusercontent.com",
    "avatars3.githubusercontent.com",
    "avatars4.githubusercontent.com",
    "avatars5.githubusercontent.com",
    "avatars6.githubusercontent.com",
    "avatars7.githubusercontent.com",
    "avatars8.githubusercontent.com",
    "favicons.githubusercontent.com",
    "codeload.github.com",
    "github-cloud.s3.amazonaws.com",
    "github-com.s3.amazonaws.com",
    "github-production-release-asset-2e65be.s3.amazonaws.com",
    "github-production-user-asset-6210df.s3.amazonaws.com",
    "github-production-repository-file-5c1aeb.s3.amazonaws.com",
    "githubstatus.com",
    "github.community",
    "media.githubusercontent.com",
    "camo.githubusercontent.com",
    "raw.githubusercontent.com",
    "cloud.githubusercontent.com",
    "user-images.githubusercontent.com",
    "customer-stories-feed.github.com",
    "pages.github.com",
    "api.github.com",
    "live.github.com",
    "githubapp.com",
    "github.dev",
    "github.com"]

HOSTS_TEMPLATE = """# GitHub Host Start
{content}
# GitHub Host End"""


class IpFetcher:
    def fetch(self, site):
        sources = [self.getIpFromChinaz, self.getIpFromipapi,
                   self.getIpFromIpaddress, self.getIpFromWhatismyipaddress]
        for fn in sources:
            ip = fn(site)
            if ip is not None:
                return ip

        return None

    def getIpFromIpaddress(self, site):
        headers = {'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebkit/737.36(KHTML, like Gecke) Chrome/52.0.2743.82 Safari/537.36',
                   'Host': 'ipaddress.com'}
        url = "https://ipaddress.com/search/" + site
        trueip = None
        try:
            res = requests.get(url, headers=headers, timeout=5)
            soup = BeautifulSoup(res.text, 'html.parser')
            ip = re.findall(r"\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b", res.text)
            result = soup.find_all('div', class_="comma-separated")
            for c in result:
                if len(ip) != 0:
                    trueip = ip[0]
        except Exception as e:
            print("查询" + site + " 时出现错误: " + str(e))
        return trueip

    def getIpFromChinaz(self, site):
        headers = {'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebkit/737.36(KHTML, like Gecke) Chrome/52.0.2743.82 Safari/537.36',
                   'Host': 'ip.tool.chinaz.com'}
        url = "http://ip.tool.chinaz.com/" + site
        trueip = None
        try:
            res = requests.get(url, headers=headers, timeout=5)
            soup = BeautifulSoup(res.text, 'html.parser')
            result = soup.find_all('span', id="IpValue")
            for c in result:
                ip = re.findall(r"\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b", c.text)
                if len(ip) != 0:
                    trueip = ip[0]
        except Exception as e:
            print("查询" + site + " 时出现错误: " + str(e))
        return trueip

    def getIpFromWhatismyipaddress(self, site):
        headers = {'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebkit/737.36(KHTML, like Gecke) Chrome/52.0.2743.82 Safari/537.36',
                   'Host': 'ip.tool.chinaz.com'}
        url = "https://whatismyipaddress.com//hostname-ip"
        data = {
            "DOMAINNAME": site,
            "Lookup IP Address": "Lookup IP Address"
        }
        trueip = None
        try:
            res = requests.post(url, headers=headers, data=data, timeout=5)
            soup = BeautifulSoup(res.text, 'html.parser')
            result = soup.find_all('span', class_="Whwtdhalf w15-0")
            for c in result:
                ip = re.findall(r"\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b", c.text)
                if len(ip) != 0:
                    trueip = ip[0]
        except Exception as e:
            print("查询" + site + " 时出现错误: " + str(e))
        return trueip

    def getIpFromipapi(self, site):
        '''
        return trueip: None or ip
        '''
        headers = {'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebkit/737.36(KHTML, like Gecke) Chrome/52.0.2743.82 Safari/537.36',
                   'Host': 'ip-api.com'}
        url = "http://ip-api.com/json/%s?lang=zh-CN" % (site)
        trueip = None
        try:
            res = requests.get(url, headers=headers, timeout=5)
            res = json.loads(res.text)
            if(res["status"] == "success"):
                trueip = res["query"]
        except Exception as e:
            print("查询" + site + " 时出现错误: " + str(e))
        return trueip


ip_fetcher = IpFetcher()


def gethosts(hostsfile):
    # 读到原hosts内容
    with open(hostsfile, 'r') as fd:
        old_hosts = fd.readlines()
    return old_hosts


def get_ip(raw_url: str):
    trueip = ip_fetcher.fetch(raw_url)
    if trueip is not None:
        print(raw_url + "\t" + trueip)
        return raw_url, trueip

    return raw_url, None


def writehosts(old_hosts, content):
    # 删除老hosts中的ip
    for line in range(len(old_hosts)):
        if '# GitHub Host Start' in old_hosts[line]:
            startline = line
        if '# GitHub Host End' in old_hosts[line]:
            endline = line
            break
    # 得到起止行数startline，endline
    for i in range(startline, endline+1)[::-1]:
        del old_hosts[i]
    # 在被删除原ip地址的区域增加新的github ip
    old_hosts.append(content)
    # 把列表内容全部连接起来
    new_hosts = ''.join(old_hosts)
    # 复写入文件
    with open("new_hosts", 'w') as fd:
        fd.write(new_hosts)
    print('复写成功')


def main():
    content = ""
    for raw_url in RAW_URL:
        print("\nfetching ip of ", raw_url)
        host_name, ip = get_ip(raw_url)
        if ip is None:
            continue

        content += ip.ljust(30) + host_name + "\n"

    hosts_content = HOSTS_TEMPLATE.format(content=content)
    print("find new hosts:\n", hosts_content)
    hostsfile = "/etc/hosts"
    old_hosts = gethosts(hostsfile)
    writehosts(old_hosts, hosts_content)


if __name__ == "__main__":
    main()
